FieldTermStack.java example

Explorer
mdrill-master
- trunk
package org.apache.lucene.search.vectorhighlight;
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.util.Collections;
import java.util.LinkedList;
import java.util.Set;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.index.TermVectorOffsetInfo;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;

/**
 * <code>FieldTermStack</code> is a stack that keeps query terms in the specified field
 * of the document to be highlighted.
 */
public class FieldTermStack {
  
  private final String fieldName;
  LinkedList<TermInfo> termList = new LinkedList<TermInfo>();
  
  public static void main( String[] args ) throws Exception {
    Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_CURRENT);
    QueryParser parser = new QueryParser(Version.LUCENE_CURRENT,  "f", analyzer );
    Query query = parser.parse( "a x:b" );
    FieldQuery fieldQuery = new FieldQuery( query, true, false );
    
    Directory dir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer));
    Document doc = new Document();
    doc.add( new Field( "f", "a a a b b c a b b c d e f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
    doc.add( new Field( "f", "b a b a f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
    writer.addDocument( doc );
    writer.close();
    
    IndexReader reader = IndexReader.open( dir, true );
    new FieldTermStack( reader, 0, "f", fieldQuery );
    reader.close();
  }

  /**
   * a constructor.
   * 
   * @param reader IndexReader of the index
   * @param docId document id to be highlighted
   * @param fieldName field of the document to be highlighted
   * @param fieldQuery FieldQuery object
   * @throws IOException
   */
  public FieldTermStack( IndexReader reader, int docId, String fieldName, final FieldQuery fieldQuery ) throws IOException {
    this.fieldName = fieldName;

    Set<String> termSet = fieldQuery.getTermSet( fieldName );
    // just return to make null snippet if un-matched fieldName specified when fieldMatch == true
    if( termSet == null ) return;

    TermFreqVector tfv = reader.getTermFreqVector( docId, fieldName );
    if( tfv == null ) return; // just return to make null snippets
    TermPositionVector tpv = null;
    try{
      tpv = (TermPositionVector)tfv;
    }
    catch( ClassCastException e ){
      return; // just return to make null snippets
    }
    
    
    for( String term : tpv.getTerms() ){
      if( !termSet.contains( term ) ) continue;
      int index = tpv.indexOf( term );
      TermVectorOffsetInfo[] tvois = tpv.getOffsets( index );
      if( tvois == null ) return; // just return to make null snippets
      int[] poss = tpv.getTermPositions( index );
      if( poss == null ) return; // just return to make null snippets
      for( int i = 0; i < tvois.length; i++ )
        termList.add( new TermInfo( term, tvois[i].getStartOffset(), tvois[i].getEndOffset(), poss[i] ) );
    }
    
    // sort by position
    Collections.sort( termList );
  }

  /**
   * @return field name
   */
  public String getFieldName(){
    return fieldName;
  }

  /**
   * @return the top TermInfo object of the stack
   */
  public TermInfo pop(){
    return termList.poll();
  }

  /**
   * @param termInfo the TermInfo object to be put on the top of the stack
   */
  public void push( TermInfo termInfo ){
    // termList.push( termInfo );  // avoid Java 1.6 feature
    termList.addFirst( termInfo );
  }

  /**
   * to know whether the stack is empty
   * 
   * @return true if the stack is empty, false if not
   */
  public boolean isEmpty(){
    return termList == null || termList.size() == 0;
  }
  
  public static class TermInfo implements Comparable<TermInfo>{

    final String text;
    final int startOffset;
    final int endOffset;
    final int position;

    TermInfo( String text, int startOffset, int endOffset, int position ){
      this.text = text;
      this.startOffset = startOffset;
      this.endOffset = endOffset;
      this.position = position;
    }
    
    public String getText(){ return text; }
    public int getStartOffset(){ return startOffset; }
    public int getEndOffset(){ return endOffset; }
    public int getPosition(){ return position; }
    
    @Override
    public String toString(){
      StringBuilder sb = new StringBuilder();
      sb.append( text ).append( '(' ).append(startOffset).append( ',' ).append( endOffset ).append( ',' ).append( position ).append( ')' );
      return sb.toString();
    }

    public int compareTo( TermInfo o ) {
      return ( this.position - o.position );
    }
  }
}